/*------------------------------------------------------------------------------

Publication: Persson, M., Schakel, W. and Sundell, A. (2023) "A Man's World? The
	Policy Representation of Women and Men in a Comparative Perspective."
	Perspectives on Politics.

Description: This do-file merges the datasets used for the analyses in the
	paper. It is not necessary to run this do-file to replicate the paper,
	because this do-file constructs the data files that are provided with the
	supplementary material. However, the file is still included with the
	replication materials for the sake of transparancy, as it shows how the
	various sources were transformed into the data file used in the analyses.

Sections:

	1. Women in National Parliaments
	2. Quality of Government
	3. Varieties of Democracy
	4. World Values Survey/European Values Study
	5. Final data preparation

Data: "persson_pop_data_slim," source data for public opinion and policy change
	information, not publicly available

	Historical Data on Women in National Parliaments, downloaded in October
	2021 from: https://data.ipu.org/historical-women

	Quality of Government Standard Dataset, downloaded in October 2021 from:
	https://www.gu.se/en/quality-government/qog-data/data-downloads/standard-
	dataset

	Varieties of Democracy Dataset Version 11, downloaded in October 2021 from:
	https://www.v-dem.net/data/dataset-archive/

	World Values Survey trend file, downloaded in October 2021 from:
	https://www.worldvaluessurvey.org/WVSEVStrend.jsp
	
	European Values Study trend file, downloaded in October 2021 from:
	https://doi.org/10.4232/1.13736

------------------------------------------------------------------------------*/

* This is where the datasets are saved
global data_main "YOUR\PATH\HERE"
global data_ipu "YOUR\PATH\HERE"
global data_qog "YOUR\PATH\HERE"
global data_vdem "YOUR\PATH\HERE"
global data_wvs "YOUR\PATH\HERE"
global data_evs "YOUR\PATH\HERE"

/*------------------------------------------------------------------------------
 1. Women in National Parliaments
------------------------------------------------------------------------------*/

import excel																///
	"$data_ipu\women_in_parliament-historical_database-1945_to_2018.xlsx",	///
	clear firstrow

keep if inlist(ChamberType, "Lower", "Single", "Single Lower?")

gen c = Country
replace c = "Germany" if c == "Germany (Federal Republic Of)"
replace c = "Great Britain" if c == "United Kingdom"
replace c = "South Korea" if c == "Republic of Korea"
replace c = "United States" if c == "United States of America"

keep if c == "Australia" | c == "Austria" | c == "Belgium" | 				///
	c == "Bulgaria" | c == "Canada" | c == "Chile" | c == "Croatia" | 		///
	c == "Cyprus" | c == "Czech Republic" | c == "Denmark" | 				///
	c == "Estonia" | c == "Finland" | c == "France" | c == "Germany" | 		///
	c == "Great Britain" | c == "Greece" | c == "Hungary" | 				///
	c == "Iceland" | c == "Ireland" | c == "Israel" | c == "Italy" | 		///
	c == "Japan" | c == "Latvia" | c == "Lithuania" | c == "Luxembourg" | 	///
	c == "Malta" | c == "Mexico" | c == "Montenegro" | 						///
	c == "Netherlands" | c == "New Zealand" | c == "Norway" | 				///
	c == "Poland" | c == "Portugal" | c == "Romania" | c == "Serbia" | 		///
	c == "Slovakia" | c == "Slovenia" | c == "South Korea" | 				///
	c == "Spain" | c == "Sweden" | c == "Switzerland" | c == "Turkey" | 	///
	c == "United States"

* Data for Chile is missing, so we supplement it manually using information from
* https://data.ipu.org/content/chile?chamber_id=13362 ("Percentage of women",
* downloaded October 2021).
scalar N1 = _N
set obs `=N1 + 8'

replace c = "Chile" if c == ""
replace Year = 1990 in `=N1 + 1'
replace Year = 1993 in `=N1 + 2'
replace Year = 1998 in `=N1 + 3'
replace Year = 2001 in `=N1 + 4'
replace Year = 2005 in `=N1 + 5'
replace Year = 2009 in `=N1 + 6'
replace Year = 2013 in `=N1 + 7'
replace Year = 2017 in `=N1 + 8'

replace OfWomeninChamber = ".0583" in `=N1 + 1'
replace OfWomeninChamber = ".0750" in `=N1 + 2'
replace OfWomeninChamber = ".1083" in `=N1 + 3'
replace OfWomeninChamber = ".1250" in `=N1 + 4'
replace OfWomeninChamber = ".1500" in `=N1 + 5'
replace OfWomeninChamber = ".1417" in `=N1 + 6'
replace OfWomeninChamber = ".1583" in `=N1 + 7'
replace OfWomeninChamber = ".1226" in `=N1 + 8'

* We keep the final observation per country-year
gen n = _n
egen group = group(c Year)
sort group n
by group: replace group = _n
drop if group > 1
drop group n

replace OfWomeninChamber = subinstr(OfWomeninChamber, "%", "", .)
replace OfWomeninChamber = subinstr(OfWomeninChamber, "?", "", .)
destring OfWomeninChamber, gen(femparl)
replace femparl = femparl / 100 if femparl > 1
replace femparl = round(femparl, .0001)

fillin c Year
encode c, gen(c2)
tsset c2 Year
replace femparl = L.femparl if femparl == . & L.femparl != . & c2 == L.c2

keep c Year femparl
rename c country2
rename Year year

* We create temporary files, which we merge with the main data below
tempfile ipu
save `ipu', replace

/*------------------------------------------------------------------------------
 2. Quality of Government
------------------------------------------------------------------------------*/

import delimited "$data_qog\qog_std_ts_jan21.csv", clear

gen femmin = wgov_minfem / wgov_min
gen school_fem = bl_lhf / 100
gen work_fem = wdi_lfpfne15 / 100
gen turnout = ideavt_legvt / 100

keep cname year school_fem work_fem turnout femmin

* We use linear interpolation, useful for variables that are measured
* periodically (such as turnout)
quietly foreach var of varlist femmin-turnout {
	bys cname: ipolate `var' year, gen(`var'2)
	drop `var'
	rename `var'2 `var'
}

* We rename some of the countries to allow merging with the other datasets
replace cname = "Cyprus" if cname == "Cyprus (1975-)"
replace cname = "France" if cname == "France (1963-)"
replace cname = "Great Britain" if cname == "United Kingdom"
replace cname = "South Korea" if cname == "Korea, South"
rename cname country2

tempfile qog
save `qog', replace

/*------------------------------------------------------------------------------
 3. Varieties of Democracy
------------------------------------------------------------------------------*/

import delimited "$data_vdem\V-Dem-CY-Core-v11.csv", clear

* Here we only have to rename and drop some variables
keep country_name year v2x_gencl v2x_gencs
replace c = "United States" if c == "United States of America"
replace c = "Great Britain" if c == "United Kingdom"
rename country_name country2
rename v2x_gencl civillib
rename v2x_gencs civilsoc

tempfile vdem
save `vdem', replace

/*------------------------------------------------------------------------------
 4. World Values Survey/European Values Study
------------------------------------------------------------------------------*/

use "$data_evs\ZA7503_v2-0-0.dta", clear // EVS

* Country names
decode S003, gen(c)
replace c = strtrim(c)

gen year = S020
gen weight = S017

keep c year weight D059 F118 F120 F121

tempfile evs
save `evs', replace

use "$data_wvs\WVS_Trend_v2_0.dta", clear // WVS

decode S003, gen(c)
replace c = "Czech Republic" if c == "Czech Rep."
gen year = S020
gen weight = S017
keep c year weight D059 F118 F120 F121

quietly append using `evs'

* Dropping some countries to save a little bit of time
keep if c == "Australia" | c == "Austria" | c == "Belgium" | 				///
	c == "Bulgaria" | c == "Canada" | c == "Chile" | c == "Croatia" | 		///
	c == "Cyprus" | c == "Czech Republic" | c == "Denmark" | 				///
	c == "Estonia" | c == "Finland" | c == "France" | c == "Germany" | 		///
	c == "Great Britain" | c == "Greece" | c == "Hungary" | 				///
	c == "Iceland" | c == "Ireland" | c == "Israel" | c == "Italy" | 		///
	c == "Japan" | c == "Latvia" | c == "Lithuania" | c == "Luxembourg" | 	///
	c == "Malta" | c == "Mexico" | c == "Montenegro" | 						///
	c == "Netherlands" | c == "New Zealand" | c == "Norway" | 				///
	c == "Poland" | c == "Portugal" | c == "Romania" | c == "Serbia" | 		///
	c == "Slovakia" | c == "Slovenia" | c == "South Korea" | 				///
	c == "Spain" | c == "Sweden" | c == "Switzerland" | c == "Turkey" | 	///
	c == "United States"

* We create our index of cultural values from three WVS/EVS items
alpha F118 F120 F121 // scale reliability (mentioned in fn. 4 of the text)
recode F118 F120 F121 (-5/-1 = .)
gen culval = (F118 + F120 + F121) / 30
recode D059 (-5/-1 = .), gen(polval)
drop D059 F118 F120 F121

collapse culval polval [pw = weight], by(c year)

* To see the correlation between the two measures (see fn. 4 of the text)
corr culval polval
drop polval

* Years without WVS/EVS waves are added for the interpolation
scalar N2 = _N
set obs `=N2 + 13'
replace c = "United States" if c == ""

local counter = 1
foreach i in 1978 1979 1980 1985 1986 1987 1988 1994 2002 2003 2014 2015 2016 {
	replace year = `i' in `=N2 + `counter''
	local ++counter
}

fillin c year
drop _fillin

encode c, gen(c2)
tsset c2 year

* We interpolate values if they are within 5 years of a wave
rename culval culvalold
bys c2: ipolate culvalold year, gen(culval)
recode culval (min/max = 0), gen(culvaltemp)

forval i = 1/42 {
	replace culvaltemp = `i' if culvaltemp == . & (L`i'.culvaltemp == 0 |	///
	F`i'.culvaltemp == 0)
	}

recode culval (min/max = .) if culvaltemp > 5
drop culvalold culvaltemp

rename c country2

tempfile wvsevs
save `wvsevs'

/*------------------------------------------------------------------------------
 5. Final data preparation
------------------------------------------------------------------------------*/

* Now we can combine all the files
use "$data_main\persson_pop_data_slim.dta", clear
merge m:1 country2 year using `ipu', nogen
merge m:1 country2 year using `qog', nogen
merge m:1 country2 year using `vdem', nogen
merge m:1 country2 year using `wvsevs', nogen
format %20s country2
drop if issue == .
egen cy = group(country year)

order country country2 c c2 year cy source issue polarea aver women_change	///
	men_change diff polchange cong_women_dich cong_men_dich cong_women_cont	///
	cong_men_cont school_fem work_fem turnout femmin femparl, first
sort country year source issue

* Variable labels
label var country "Country code (string)"
label var country2 "Country name (string)"
label var c "Country code (numeric)"
label var c2 "Country name (numeric)"
label var year "Survey year"
label var cy "Country-year"
label var source "Survey source"
label var issue "Short issue wording"
label var polarea "Policy area"

label var average_change "Share of all respondents supporting change"
label var women_change "Share of women supporting change"
label var men_change "Share of men supporting change"
label var polchange "Policy change after five years"
label var diff "Men-minus-women's preferences"
label var cong_women_dich "Dichotomous congruence, women"
label var cong_men_dich "Dichotomous congruence, men"
label var cong_women_cont "Continuous congruence, men"
label var cong_men_cont "Continuous congruence, women"

label var school_fem "Percentage with tertiary schooling, female"
label var work_fem "Labor force participation rate, female"
label var turnout "Parliamentary election: voter turnout"
label var femmin "Share of female ministers in government"
label var femparl "Share of female parliamentarians"
label var civillib "V-Dem women's civil liberties"
label var civilsoc "V-Dem women's civil society participation"
label var culval "Culturally progressive values, WVS/EVS"

save "$data_main\persson_pop_data_01_main.dta", replace
